New Functionalities
Many of the interesting conclusions that can be drawn from this dataset are associated with:
- Product price variability over time.
- Product availability over time.
- Product offer prices variability over time.
For people who don’t have access to the entire database, some new functionalities for the detection of variability of those variables could be useful. Here I propose some of them.
Download Jupyter Notebook for this section
Table of contents
Price variability by product
This function allows the user to know how many times the price (and availability and price promotion if apply) of a product changed in the sampling time period.
product_variability({product ID})
product_variability <- function(prod,price_var=1){
date_range <- function(date1,date2){
date1= format(as.Date(date1, format = "%Y-%m-%d"), "%b-%y")
date2= format(as.Date(date2, format = "%Y-%m-%d"), "%b-%y")
drange <- paste(date2,date1,sep="-")
return(drange)
}
price_range <- function(price_min,price_max){
prange <- paste(price_min,price_max,sep="-")
return(prange)
}
#Detect variability in col column
variability <- function(df, col,product){
df <- subset(df, !is.na(df[[col]]))
var=0
if (nrow(df)>1){
row_value <- df[1, col]
for (row in 2:nrow(df)){
if(df[row, col]!= row_value) {
row_value <- df[row, col]
var <- var + 1
}
}
}
return(var)
}
#Get prices for product ID
p_prices<-product_prices(prod)
#Get how many times the price changes over the date range
p_var <- variability(p_prices,"price",prod)
#Get more information if the price change more or equal to price_var times
if (p_var >= price_var){
retailer_id <- product(prod)$retailer_id
product<-product(prod)$product
total_samples<-nrow(p_prices)
date_range <- date_range(p_prices$'time'[1],p_prices$'time'[total_samples])
price_range <- price_range(min(p_prices$'price'),max(p_prices$'price'))
#Check availability variability
if (all(is.na(p_prices[['available']]))){
av_var <- "NA"
}
else{
av_var <- variability(p_prices,"available",prod)
}
#Check promotions variability
if (all(is.na(p_prices[['price_promotion']]))){
prom_var <- "NA"
}
else{
prom_var <- variability(p_prices,"price_promotion",prod)
}
#Save the data and convert indo a dataframe
output<-list(retailer_id,prod,product,price_range,p_var,av_var,prom_var,total_samples,date_range)
output<-as.data.frame(output)
names(output) <- c('retailer_id','product_id','product','price_range','price_variability','avail_variability','prom_variability','total_samples', 'date_range')
}
else{
output <- data.frame()
}
return(output)
}
Example
product_variability(587936)
| retailer_id | product_id | product | price_range | price_variability | avail_variability | prom_variability | total_samples | date_range |
|---|---|---|---|---|---|---|---|---|
| 52 | 587936 | JVC LT-50N750A 50"FHD SMART L.E.D TV | 4890-6999 | 8 | 0 | NA | 15 | Feb-20-May-20 |
Product 587936 changed price 8 times in the sampling period (Feb-20-May-20). With no variation in availability and no data about promotions. We can check this here:
product_prices(587936)
| product_id | time | price | price_promotion | price_effective | available |
|---|---|---|---|---|---|
| 587936 | 2020-05-25 00:37:58 | 6999 | NA | 6999 | TRUE |
| 587936 | 2020-05-18 00:36:11 | 4999 | NA | 4999 | TRUE |
| 587936 | 2020-05-11 05:00:26 | 4999 | NA | 4999 | TRUE |
| 587936 | 2020-05-04 00:36:21 | 5699 | NA | 5699 | TRUE |
| 587936 | 2020-04-20 00:33:21 | 4999 | NA | 4999 | TRUE |
| 587936 | 2020-04-13 00:26:10 | 4999 | NA | 4999 | TRUE |
| 587936 | 2020-03-30 00:28:27 | 5499 | NA | 5499 | TRUE |
| 587936 | 2020-03-23 00:27:19 | 5499 | NA | 5499 | TRUE |
| 587936 | 2020-03-16 00:33:52 | 4890 | NA | 4890 | TRUE |
| 587936 | 2020-03-09 00:26:23 | 4890 | NA | 4890 | TRUE |
| 587936 | 2020-03-02 00:28:37 | 5499 | NA | 5499 | TRUE |
| 587936 | 2020-02-24 00:26:43 | 4890 | NA | 4890 | TRUE |
| 587936 | 2020-02-17 00:44:01 | 5499 | NA | 5499 | TRUE |
| 587936 | 2020-02-10 00:43:40 | 5499 | NA | 5499 | TRUE |
| 587936 | 2020-02-03 00:36:19 | 5499 | NA | 5499 | TRUE |
Price variability by retailer
This function allows the user to know if the price of n_products products randomly selected for a certain retailer changed more or equal than price_var times in the sampling time period.
retailer_variability({retailer ID or list of retailers IDs}, {price_var}, {n_products})
By default n_products=10 and price_var=1
You can also see the changes (if exist) on availability or price promotion.
retailer_variability <- function(ret_id, price_var=1, n_products=10) {
output=data.frame()
var_not_found =c()
#For each retailer ID, get randomly #n_products products and detect variability
for (ret in ret_id){
products <- c(retailer_products(ret) [['product_id']])
prod_ids <- c(sample(products, n_products, replace=FALSE))
for (prod in prod_ids){
getprod <- (product_variability(prod,price_var))
if (nrow(getprod)>0){
output <- rbind(output,getprod)
}
}
}
#Print details if no variability found
if (nrow(output)>0){
var_not_found <- ret_id[!(ret_id %in% output[['retailer_id']])]
if (length(var_not_found)>1){
cat("Nothing found for retailers: ",var_not_found, sep=" ")
}
else if (length(var_not_found)==1) {
cat("Nothing found for retailer: ",var_not_found)
}
}
else{
print("No variability found")
}
output
}
Example
retailer_variability(71)
| retailer_id | product_id | product | price_range | price_variability | avail_variability | prom_variability | total_samples | date_range |
|---|---|---|---|---|---|---|---|---|
| 71 | 1387108 | Canon EF-S 55-250 mm f4-5.6 IS STM Lens – Lenses – Foto Discount World | 6100-7000 | 4 | NA | 4 | 10 | Mar-20-May-20 |
| 71 | 1385898 | Defy Cooker Hood 600 STD B (DCH290) | Delivery Charge Excluded | 1000-1100 | 2 | NA | 2 | 10 | Mar-20-May-20 |
| 71 | 1387150 | Nikon 50mm f1.4G AF-S Lens – Lenses – Foto Discount World | 8600-10800 | 3 | NA | 4 | 10 | Mar-20-May-20 |
| 71 | 1385866 | Meade Infinity 50mm Altazimuth Refractor + Free Delivery | 920-1095 | 1 | NA | NA | 10 | Mar-20-May-20 |
| 71 | 1385491 | Canon Filter 82mm Protector + Free Delivery | 1600-1700 | 3 | NA | 4 | 10 | Mar-20-May-20 |
| 71 | 1383973 | Fujifilm XF 35mm F1.4 R Lens – Lenses – Foto Discount World | 8900-11200 | 3 | NA | 4 | 10 | Mar-20-May-20 |
| 71 | 1384171 | Sigma 16mm f/1.4 DC DN Contemporary Lens for Micro Four Thirds + Free Delivery | 7100-8000 | 3 | NA | 4 | 10 | Mar-20-May-20 |
You can also pass a list of retailers IDs:
retailer_variability(c(71,24,3,52))
Nothing found for retailer: 3
| retailer_id | product_id | product | price_range | price_variability | avail_variability | prom_variability | total_samples | date_range |
|---|---|---|---|---|---|---|---|---|
| 71 | 1386116 | Sigma 52mm WR UV filter + Free Delivery | 700-800 | 3 | NA | 3 | 10 | Mar-20-May-20 |
| 71 | 1384327 | Canon Zoemini C – Instant Camera Printer ( Bumblebee Yellow ) – Cameras – Foto Discount World | 2000-2400 | 3 | NA | 4 | 10 | Mar-20-May-20 |
| 71 | 1382284 | Nikon AF-S 70-200mm f4G ED VR Lens – Lenses – Foto Discount World | 0-30900 | 2 | NA | 1 | 10 | Mar-20-May-20 |
| 71 | 1387643 | Canon EOS C300 Mk II ( CFast Card ) EF Mount Video Camera – Video Cameras – Foto Discount World | 139100-153400 | 4 | NA | 4 | 10 | Mar-20-May-20 |
| 24 | 96497 | Arroll 4-Column Cast Iron Radiator 760 x 754mm Cream | 391.02-409.99 | 2 | NA | NA | 16 | Jan-20-May-20 |
| 24 | 246308 | Blyss Undertile Heating Mat 1m² | 19.99-39.99 | 2 | NA | 0 | 16 | Jan-20-May-20 |
| 52 | 588202 | AMPLIFY PRO FUSION BT H/P-AMP2011BKBL | 199.99-299.99 | 2 | NA | NA | 15 | Feb-20-May-20 |
| 52 | 588168 | OVHD REPLACEMENT REMOTE CONTROL | 99.99-149.99 | 2 | NA | NA | 15 | Feb-20-May-20 |
| 52 | 2268183 | HISENSE H12DESS S/STEEL 12PLC D/WASHE | 4299-4999 | 1 | NA | NA | 6 | Apr-20-May-20 |
| 52 | 588005 | HISENSE WTX1302T 13KG TITANIUM T/L | 4199-4799 | 6 | NA | NA | 14 | Feb-20-May-20 |
| 52 | 588914 | 3PCE HILTON WALL UNIT | 3900-4499 | 4 | NA | NA | 15 | Feb-20-May-20 |
| 52 | 588460 | HP AMD ULTIMATE BUNDLE | 5690-5999 | 9 | NA | NA | 15 | Feb-20-May-20 |
| 52 | 588001 | JVC XV-N430B HDMI DVD PLAYER | 450-699 | 6 | NA | NA | 15 | Feb-20-May-20 |
| 52 | 588900 | ALPINE TV STAND | 1700-1999 | 8 | NA | NA | 15 | Feb-20-May-20 |
| 52 | 588842 | 152CM DREAM THERAPY BASE SET | 3990-5999 | 7 | NA | NA | 15 | Feb-20-May-20 |
Note: If you get “Nothing found for retailer {retailer_ID}” it doesn’t mean that there is no product with price variation >= price_var for this retailer. It means that it is no variation for the randomly selected n_products products for that retailer.
I’m feeling lucky
This function allows the user to know if the price of n_products randomly selected products for n_retailers randomly selected retailers changed more or equal than price_var times in the sampling time period.
feeling_lucky_variability({n_retailers}, {price_var}, {n_products})
By default n_retailers=5, n_products=10 and price_var=1
You can also see the change (if exist) on availability or price promotion.
feeling_lucky_variability <- function(n_retailers=5, price_var=1, n_products=10){
#For all the retailers ID, get randomly #n_retailers retailers
retailers <- retailer()[['retailer_id']]
ret_id <- c(sample(retailers, n_retailers, replace=FALSE))
#Check variability for all the randomly selected retailers
retailer_variability(ret_id, price_var, n_products)
}
Example
feeling_lucky_variability()
Nothing found for retailers: 64 86
| retailer_id | product_id | product | price_range | price_variability | avail_variability | prom_variability | total_samples | date_range |
|---|---|---|---|---|---|---|---|---|
| 63 | 900900 | Good Grips Pop 2 Rectangular Container - Wide Tall, 3.5L | 359-369 | 1 | 0 | NA | 69 | Feb-20-May-20 |
| 63 | 905964 | Gourmet Chutney, 250ml | 49-52 | 1 | 0 | NA | 69 | Feb-20-May-20 |
| 59 | 516135 | Yellow Roses in Designer Box - 12 roses | 520-676 | 4 | NA | NA | 14 | Jan-20-May-20 |
| 59 | 1518697 | Personalised Polaroid Sweater - L | 520-540 | 1 | NA | NA | 9 | Mar-20-May-20 |
| 59 | 484893 | Personalised Vintage Pink Fleece Gown Gift - Medium | 1300-1495 | 3 | NA | NA | 8 | Jan-20-May-20 |
| 59 | 2697016 | HALLS FRUIT DRINK 1.25LT, MANGO'ORANG - Standard | 43-44 | 1 | NA | NA | 3 | Apr-20-May-20 |
| 59 | 496175 | Pink Party Combo Box - Standard | 390-405 | 1 | NA | NA | 14 | Jan-20-May-20 |
| 59 | 685649 | Personalised Gentleman Backsberg Wine - Brut mcc | 520-598 | 2 | NA | NA | 12 | Feb-20-May-20 |
| 59 | 636151 | Pastel Pink Floral Flair in Envelope Box - Purple | 611-769 | 4 | NA | NA | 12 | Feb-20-May-20 |
| 59 | 1224465 | Personalised Pineapple Beach Towel - Standard | 390-405 | 1 | NA | NA | 11 | Mar-20-May-20 |
| 59 | 476149 | Personalised Fierce Waterbottle And Racerback - Large | 520-598 | 3 | NA | NA | 13 | Jan-20-May-20 |
| 59 | 670078 | Personalised Leaf Foilage Stone Towel Set - Set of 3 | 910-1079 | 2 | NA | NA | 11 | Feb-20-May-20 |
| 95 | 2245608 | Nutriday Low Fat Guava Fruit Yoghurt 1kg | 29.99-32.49 | 1 | NA | NA | 41 | Apr-20-May-20 |
| 95 | 2732671 | Denny Country Casserole Flavoured Cook-In-Sauce Pouch 415g | 26.99-34.99 | 1 | NA | NA | 21 | May-20-May-20 |
| 95 | 2900189 | <span style=white-space:pre-wrap>Fresh Cut Berry & Melon Fruit Salad Pack 600g </span> | 54.99-59.99 | 1 | NA | NA | 7 | May-20-May-20 |
Plot price vs time
This function allows the user to get an interactive visualization about the price (and price promotion if applies) vs time of a certain product.
plot_prod_variability({product_id})
library(plotly)
plot_prod_variability<- function(prod){
#Get data about the product
p_prices <- product_prices(prod)
product <- product(prod)$product
retailer <- product(prod)$retailer_id
#Build an interactive plot
trace_0 <- p_prices[['price']]
trace_1 <- p_prices[['price_promotion']]
fig <- plot_ly(p_prices, x = p_prices[['time']])
fig <- fig %>% add_trace(y = ~trace_0, name = 'price',mode = 'lines')
fig <- fig %>% add_trace(y = ~trace_1, name = 'price promotion', mode = 'lines')
fig <- fig %>%
layout(autosize = F,
width = 600,
height = 350,
margin = list(l=50, r=50, b=100, t=150, pad=4),
title = list(text = paste0(paste("Product: ",product),
'<br>',
'<br>',
'<sup>',
paste("Product ID: ",toString(prod),", Retailer ID: ",product(prod)$retailer_id),
'</sup>')),
xaxis = list(title="Date"),
yaxis = list(title="Price")
)
return(fig)
}
Example
plot_prod_variability(1386116)